#!/bin/bash

export BOWTIE_INDEXES="/data/DATA/indexes"

if [ "${1}" == "" -o "${3}" == "" ]; then
  echo "
 Programm usage:"
  echo " 
 $0 -bedtophatmsql <OPTIONS>
 $0 -dotophat <OPTIONS>
 $0 -bedmsql <OPTIONS>
 $0 -bedmsql_pair <OPTIONS>
 $0 -bowtiehits <OPTIONS>
 $0 -bowtiebam <OPTIONS>
 $0 -dosicer <OPTIONS>
 $0 -doquest <OPTIONS>
 $0 -doaverage <OPTIONS>

  OPTIONS:
   <NAME> <GENOME>
  "
 exit 
fi

export BED_SITESHIFT=75
export BED_WINDOW=200
export TYPE=DNA

case ${1} in
  *tophat*)
   export BED_SITESHIFT=0
   export TYPE=RNA
   export BED_WINDOW=20
  ;;
  *pair*)
   export BED_SITESHIFT=0
  ;;
esac


export RUN_ON="${3}" #

#export GROUP="joe"
export BED_FORMAT=4 # 4/8

case ${2} in
  "MARK")
    export GROUP="marcroch"
   #export GROUP="markroch1"
    export ADD_TOPHAT=" -T "
  ;;
  "YRINA")
   export GROUP="barskii2"
   export ADD_TOPHAT=" --library-type fr-firststrand " #DUTP
   export BED_FORMAT=8
  ;;
  "BARSKI")
   export GROUP="barskii2"
   export ADD_TOPHAT=" "
  ;;
  "CLARK")
   export GROUP="barskii2"
   export ADD_TOPHAT=" "
  ;;
  "SATOSHI")
    export GROUP="NamekawaSatoshi"
    export ADD_TOPHAT=" -T "
  ;;
  "FILIPI")
   export GROUP="filipi"
  ;;
  "NIH")
   export GROUP="barskii5" #CD4 substets RNA-seq
   #export GROUP="barskii4" #CD4 substets chromatin
   export ADD_TOPHAT=" --segment-length 18 "
  ;;
  "TEST")
   export GROUP="test"
  ;;
  "RAHUL")
   export GROUP="rahul"
   export ADD_TOPHAT=" -T "
  ;;
  "JOE")
   export GROUP="joe"
   export ADD_TOPHAT=" -T "
  ;;
  *)
  echo "Unknown name"
  exit
  ;;
esac


export WORK_DIR="/data/DATA/FASTQ-DATA/${2}/${TYPE}"



#export RUN_ON="rebo" #
#export RUN_ON="hg19c" #
#export RUN_ON="hg19" #
#export RUN_ON="mm9" #

# "-5 6" #- tophat trim 6 bases from 5'
export ADD_TOPHAT=" -g 1 --no-novel-juncs ${ADD_TOPHAT} "

case ${RUN_ON} in 
 "hg19")
   export GENOME=hg19
   export TRANSCRIPTOME=" --transcriptome-index ${BOWTIE_INDEXES}/gtf/hg19_refsec_genes "
   export GFT_FILE="-G ${BOWTIE_INDEXES}/gtf/hg19_refsec_genes.gtf"
#   export TRANSCRIPTOME=" --transcriptome-index ${BOWTIE_INDEXES}/hg19_genes"
#   export GFT_FILE="-G ${BOWTIE_INDEXES}/gtf/hg19_genes.gtf"
 ;;
 "hg19c")
   export GENOME=hg19c
   export GFT_FILE=" -G ${BOWTIE_INDEXES}/gtf/hg19_refsec_genes_control.gtf "
   export TRANSCRIPTOME=" --transcriptome-index ${BOWTIE_INDEXES}/gtf/hg19_refsec_genes_control "
 ;;
 "mm9")
   export GENOME=mm9
   export SQLDB=" -sql_dbname=mm9 "
   export GFT_FILE=" -G ${BOWTIE_INDEXES}/gtf/mm9_refsec_genes_2012.gtf "
   export TRANSCRIPTOME=" --transcriptome-index ${BOWTIE_INDEXES}/gtf/mm9_refsec_genes_2012 "
#    export TRANSCRIPTOME=" --keep-tmp  "
 ;;
 "hg19_lupus")
   export GENOME=hg19_lupus
 ;;
 "rebo")
   export GENOME=rebo
   export SUFFIX="_rebo"
 ;;
 *)
 echo "Incorect running parameter"
 exit
 ;;
esac

export BOWTIE_PATH="bowtie"
export TOPHAT_PATH="tophat2"


#module load bowtie/0127 2>/dev/null
#module load python/2.7.1 2>/dev/null
#module load tophat/141 2>/dev/null

#
# Aligment bowtie result to bam & hits
#
#
function do_bw_bam {

ZNA=${NA}.gz

if [ -f ./${ZNA} ]; then
 echo "${ZNA} exist"
 ZC=zcat
else
 ZNA=${NA}
 ZC=cat
fi

case ${1} in
 "bam") 
   if [ -f ./${NA}${SUFFIX}.bam ]; then
    echo "${NA}${SUFFIX}.bam exist"
   else 
    $ZC ${ZNA}|${BOWTIE_PATH} -q -v 2 -m 1 --best --strata -p 24 -S ${TRIM} \
    ${GENOME} - 2>./${NA}${SUFFIX}.bw.log|
    samtools view -Sb - >./${NA}${SUFFIX}.bam 2>/dev/null
    res=$?
    echo "Making .bam complete $res"
    return $res
   fi
  ;;
  "hits")
    if [ -f ./${NA}${SUFFIX}.hits -o -f ./${NA}${SUFFIX}.hits.gz ]; then
     echo "${NA}${SUFFIX}.hits exist"
    else 
     $ZC ${ZNA}|${BOWTIE_PATH} -q -v 2 -m 1 --best --strata -p 24 ${TRIM} \
     ${GENOME} - 2>./${NA}${SUFFIX}.bw_h.log >./${NA}${SUFFIX}.hits
     echo "Making .hits complete"
   fi
  ;;
esac

}

#
# Aligment bowtie result to bam & hits
#
#
function do_bw_bam_pair {


case ${1} in
 "bam") 
   if [ -f ./${NA}${SUFFIX}.bam ]; then
    echo "${NA}${SUFFIX}.bam exist"
   else 
    ${BOWTIE_PATH} -q -v 2 -m 1 --best --strata -p 24 -S ${TRIM} \
    ${GENOME} -1 ${NA} -2 ${NA2} 2>./${NA}${SUFFIX}.bw.log|
    samtools view -Sb - >./${NA}${SUFFIX}.bam 2>/dev/null
    echo "Making .bam complete"
   fi
  ;;
  "hits")
    if [ -f ./${NA}${SUFFIX}.hits -o -f ./${NA}${SUFFIX}.hits.gz ]; then
     echo "${NA}${SUFFIX}.hits exist"
    else 
     ${BOWTIE_PATH} -q -v 2 -m 1 --best --strata -p 11 ${TRIM} \
     ${GENOME} -1 ${NA} -2 ${NA2} 2>./${NA}${SUFFIX}.bw_h.log >./${NA}${SUFFIX}.hits
     echo "Making .hits complete"
   fi
  ;;
esac

}


#
# RNA seq aligment by tophat to bam
#
#

function do_tophat_bam {

export c=1
while [ -d ${NA}_${c} ]; do  let c=$c+1; done

FNAME="${NA}_${c}"

   if [ -f ./${NA}.bam ]; then
    echo "${NA}.bam exist"
   else 
    ${TOPHAT_PATH} --num-threads 24 -o ${FNAME} ${GFT_FILE} ${ADD_TOPHAT} ${TRANSCRIPTOME} ${BOWTIE_INDEXES}/${GENOME} ${NA}

    mv ./${FNAME}/accepted_hits.bam ./${NA}.bam
    res=$?
    echo "Making .bam by tophat complete $res"
    return $res
   fi
}


#
# if bam file does not exist for fastq file it generates by bowtie in the same directory
# function for uploading generated bams by bam2bedgraph into genome browser
#
function upload_bed_mysql {

 do_bw_bam "bam"
 if [ $? -eq 0 ]; then
  bam2bedgraph -sql_table="${TBLN}" -bed_trackname="${TBL}" -in="${NA}.bam" -out="${NA}.out" -log="${NA}.log" \
   -sql_grp="${GROUP}" -bed_window=${BED_WINDOW} -bed_siteshift=${BED_SITESHIFT} -bed_format=${BED_FORMAT} -no-bed-file  ${SQLDB}
 else
  echo "Error making bam file"
 fi
}

function upload_bed_mysql_pair {

 do_bw_bam_pair "bam"
 #replace +,=,-,\. on _
# TBL="${NA//[+|=|\-|\.]/_}"
# ${REPO_BASE}/genome-tools/src/bam2bedgraph/bam2bedgraph -sql_table="${TBL}" -in="${NA}.bam" -out="${NA}.out" -log="${NA}.log" -sql_grp="${GROUP}" -bed_window=${BED_WINDOW} -bed_siteshift=${BED_SITESHIFT} -bed_format=${BED_FORMAT} -no-bed-file  ${SQLDB}
}

#
# if bam file does not exist for fastq file it generates by tophat2 in the same directory
# function for uploading generated bams by bam2bedgraph into genome browser
#
function upload_tophat_bed_mysql {

 do_tophat_bam
 res=$?
 echo "Debug tophat return $res"
 
 if [ $res -eq 0 ]; then
 
  bam2bedgraph -sql_table="${TBLN}" -in="${NA}.bam" -out="${NA}.out" -log="${NA}.log" -bed_trackname="${TBL}" \
   -sql_grp="${GROUP}" -bed_window=${BED_WINDOW} -bed_format=${BED_FORMAT} -no-bed-file ${SQLDB}
  if [ $? -eq 0 ]; then
   LC=`wc -l ${NA}|awk '{print $1/4}'`
   grep 'Aligned:' ${NA}.log |awk -v tot=${LC} -F'Aligned: ' '{printf("Total: %d\nAligned: %d\nPercent: %f\n",tot,$2,$2*100/tot);}' >${NA}.stat
  else
   echo "Error uploading tophat bed"   
  fi
 else
  echo "Error in making tophat bam"
 fi
}


function do_bowtie_hits {
 do_bw_bam "hits"
}


function do_average {
 do_bw_bam "bam"
 do_bed_file "${NA}"
 echo "do average not done yet"
 exit 
 ${REPO_BASE}/genome-tools/ 
}

function do_sicer {
 do_bw_bam "bam"

 T=1
 W=200
 G=200
 E=100
# LIMIT=8100000
 if [ -f "./${NA}_s.bed" ]; then
  echo "${NA}_s.bed exist"
 else
  if [ ${LIMIT} -gt 0 ]; then
   bamToBed -bed12 -i ${NA}.bam |head -${LIMIT} >./${NA}_s.bed
  else
   bamToBed -bed12 -i ${NA}.bam >./${NA}_s.bed
  fi
 fi

# if [ -f "./SICER/${NA}_s-W200-G0-E100.scoreisland" ]; then
#  echo "./SICER/${NA}_s-W200-G0-E100.scoreisland exist"
 if [ -f "./SICER/${NA}_s-W${W}-G${G}-E${E}.scoreisland" ]; then
  echo "./SICER/${NA}_s-W${W}-G${G}-E${E}.scoreisland exist"
  cd ./SICER
  #BedLoaderSicer.py ${NA} ${GROUP} hg19
  cd ..
 else
  mkdirhier ./SICER/
  #sh ${REPO_BASE}/genomebrowser/thirdparty/SICER1.1/SICER/SICER-rb.sh . ${NA}_s.bed ./SICER hg19 1 50 150 0.74 50 0.1
  SICER-rb.sh . ${NA}_s.bed ./SICER $GENOME ${T} ${W} 150 0.74 ${G} ${E}
 fi
 
# cd ./SICER/
# ${REPO_BASE}/genomebrowser/scripts/BedLoaderScicer.py ${NA}_s ${GROUP} ${GENOME}

# sh /usr/src/SICER1.1/SICER/SICER-rb.sh . ${NA}.bed ./OUT hg19 1 200 150 0.74 0 100 &
# mkdirhier /mnt/sf_G_DRIVE/FASTQ-DATA/results/${NA}/OUT
# grep -v 'track' output.bed|awk '{ if($4<0) $4=$4*(-1); printf("%s\t%s\t%s\tU%s\t%s\t%s\n",$1,$2,$3,$5,$4,$6);}' >/mnt/sf_G_DRIVE/FASTQ-DATA/results/${NA}/${NA}.bed

# mkdirhier /mnt/sf_G_DRIVE/FASTQ-DATA/results/${NA}/OUT
# grep -v 'track' output.bed|awk '{ if($4<0) $4=$4*(-1); printf("%s\t%s\t%s\tU%s\t%s\t%s\n",$1,$2,$3,$5,$4,$6);}' >/mnt/sf_G_DRIVE/FASTQ-DATA/results/${NA}/${NA}.bed
# hg19 1 ["window size (bp)"] ["fragment size"] ["effective genome fraction"] ["gap size (bp)"] ["E-value"]
# /usr/src/REPO/genomebrowser/scripts/BedLoader.py ${NA}
# cd /mnt/sf_G_DRIVE/FASTQ-DATA/results/${NA}/OUT/
# /usr/src/REPO/genomebrowser/scripts/BedLoaderScicer.py ${NA}
# sh /usr/src/SICER1.1/SICER/SICER-rb.sh . ${NA}.bed ./OUT hg19 1 200 150 0.74 0 100 &
}


function do_quest {
 echo "depricated"
 exit
 do_bw_bam "hits"
 quest_dir=`basename ${NA} .fastq`
 quest_dir=${quest_dir}.quest
 if [ -d ${quest_dir} ]; then
  echo "Quest dir [${quest_dir}] already exist."
 else
  ${REPO_BASE}/genome-tools/thirdparty/QuEST_2.4/generate_QuEST_parameters.pl \
  -bowtie_align_ChIP ./${NA}.hits -rp ${ROOT_DIR}/genomes/${GENOME}/ \
  -gt ${ROOT_DIR}/b_indexes/hg19_tb -ap ./${quest_dir}
 fi
}


function do_fence {

 if [ -f ./${1}.fence ]; then
  echo "${1}.fence exist"
 else 
  fence.py --in=${NA} >${NA}.fence &
  echo "Making ${1}.fence backgrounded"
 fi

}


#
# MAIN Function
#



cd ${WORK_DIR}
 for i in `ls -c LIST*`; do
  export LST=${i}
  break
 done

if [ "${LST}" == "" ]; then
 echo "Cannot find LIST with fastq files"
 exit
fi 


# Create new file handle 5
exec 5< ${LST}

while read i <&5 ; do
#skeep lines started with #
echo "${i}"|grep -v "^#" >/dev/null
[ $? -eq 1 ] && continue

 export NA=${i}
 echo ""
 echo "_________________________________________________________________"
 echo $NA 

 export TBLN="${NA//[+|=|\-|\.]/_}"
 TBL="${NA//run*=/}"
 TBL="${TBL//.fastq/}"
 export TBL="${TBL//[+|=|\-|\.|_]/ }"

 cd ${WORK_DIR} #/${NA}/

 do_fence "${NA}"


case ${1} in
  "-bedtophatmsql")
   upload_tophat_bed_mysql
  ;;
  "-dotophat")
   do_tophat_bam
  ;;
  "-bedmsql")
   upload_bed_mysql
  ;;
  "-bedmsql_pair")
   read i2 <&5
    echo $i $i2
    export NA2=${i2}
   upload_bed_mysql_pair
  ;;
  "-bowtiehits")
   do_bowtie_hits
  ;; 
  "-bowtiebam")
   do_bw_bam "bam"
  ;;
  "-dosicer")
   do_sicer
  ;;
  "-doquest")
   do_quest
  ;;
  "-doaverage")
   do_average
  ;;
esac

done




